package com.blog.spark.utils

import com.kumkee.userAgent.UserAgentParser

/**
  * @description 解析nginx一行的数据
  * @author yuyon26@126.com
  * @date 2018/10/6 10:19
  */
object NginxStatOneLog {

  val userAgentParse = new UserAgentParser()
  val domain = "https://www.bblog.vip"

  def parseOneLog(log: String): String = {
    val logs = log.split("\"")
    try {
      val referer = logs(3) //来自地址

      val status_traffic = logs(2).replaceFirst(" ", "")
      val status_traffics = status_traffic.split(" ")
      val status = status_traffics(0) //状态
      val traffic = status_traffics(1) //流量

      val met_url_hpro = logs(1)
      val met_url_hpros = met_url_hpro.split(" ")
      val method = met_url_hpros(0) //请求方法
      var url = met_url_hpros(1) //请求路径
      val protocol = met_url_hpros(2) //请求协议

      val userAgentSource = logs(5)
      val agent = userAgentParse.parse(userAgentSource)
      val browser = agent.getBrowser //浏览器类型
      val os = agent.getOs //平台

      if (url.equals(userAgentSource) || checkNoUrl(referer) || !status.equals("200") || checkNoUrl(url) || (!url.equals("/") && !url.contains("/index"))) {
        ""
      } else {
        url = domain + url
        val ip_time = logs(0)
        val ip_times = ip_time.split(" ")
        val ip = ip_times(0) //ip
        val time = ip_times(3) + " " + ip_times(4) //时间
        //返回
        ip + "\t" + DateUtils.parse(time) + "\t" + traffic + "\t" + browser + "\t" + os
      }
    } catch {
      case e: Exception => {
        println(s"解析[${log}]失败")
        ""
      }
    }

  }

  def checkNoUrl(str: String) = {
    str.contains(".js") || str.contains(".css") ||
      str.contains(".jpg") || str.contains(".png") ||
      str.contains(".ico") || str.contains(".xml") ||
      str.contains(".txt") || str.contains(".php") ||
      str.contains(".html") || str.contains(".woff") ||
      str.contains(".scss")
  }

  //89.22.166.49 - - [15/Sep/2018:04:04:03 +0800] "GET / HTTP/1.1" 200 649 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"
  def main(args: Array[String]): Unit = {
    val str = "36.5.181.141 - - [14/Sep/2018:21:01:35 +0800] \"GET http://www.ip.cn/ HTTP/1.1\" 200 649 \"-\" \"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36\""
    print("结果：" + parseOneLog(""))
  }
}
